Jupyter at Bryn Mawr College |
|||
Public notebooks: /services/public/dblank |
Let's explore the XOR problem with different activation functions and input ranges.
from conx import Network
import numpy as np
import theano.tensor as T
import theano
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
%matplotlib inline
First, with the sigmoid. Theano comes with a sigmoid function:
sigmoid = lambda inputs: T.nnet.sigmoid(inputs) # or just use t.nnet.sigmoid
Let's see the output of that function. However, you can't just call a Theano function in Python.
sigmoid([0.5])
As you see, you just get back a label. You need to generate Python code by turning the Theano function into a Python function:
inputs = T.vector(dtype=theano.config.floatX)
pf = theano.function([inputs], sigmoid(inputs))
pf([0.5])
To see the output of a range of values, let's plot a range of
def plot_tf(tf, start, stop, title):
"""
Plot a Theano function by turning it into a Python function,
applied to range(start, stop).
"""
inputs = T.vector(dtype=theano.config.floatX)
pf = theano.function([inputs], tf(inputs))
plot_pf(pf, start, stop, title)
def plot_pf(pf, start, stop, title):
"""
Plot a Python function, applied to a range(start, stop)
"""
res = (stop - start) / 100
plt.plot(np.arange(start, stop, res), pf(np.arange(start, stop, res)))
plt.title(title)
plt.xlabel("input")
plt.ylabel("ouput")
plot_tf(sigmoid, -1, 1, "sigmoid(x)")
x = T.dscalar('x')
y = T.nnet.sigmoid(x)
dx = theano.function([x], T.grad(y, x))
pf = lambda xs: [dx(x) for x in xs]
plot_pf(pf, -1, 1, "d/dx sigmoid(x)")
net = Network(2, 2, 1, activation_function=sigmoid) # or, T.nnet.sigmoid
net
# input low and high values:
ilo = -1
ihi = 1
# output low and high values:
olo = 0
ohi = 1
net.set_inputs([[[ilo, ilo], [olo]], [[ihi, ihi], [olo]], [[ilo, ihi], [ohi]], [[ihi, ilo], [ohi]]])
def test_net(net, res=20):
m = np.zeros((res, res))
step = (ihi - ilo)/res
for i in range(res):
for j in range(res):
m[i][j] = net.propagate([ilo + i * step, ilo + j * step])
plt.matshow(m,
origin="lower", # lower puts 0,0 bottom left
extent=[ilo,ihi,ilo,ihi], # tick label ranges
cmap=cm.coolwarm,
interpolation="none")
plt.xlabel("input(0)")
plt.ylabel("input(1)")
plt.colorbar()
def plot_net(net, res=20, angle=45):
fig = plt.figure()
ax = fig.gca(projection='3d')
# Make data.
X = np.arange(ilo, ihi, (ihi - ilo)/res)
Y = np.arange(ilo, ihi, (ihi - ilo)/res)
Z = np.array([[net.propagate([x, y])[0] for x in X] for y in Y])
X, Y = np.meshgrid(X, Y)
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
# Customize the z axis.
ax.set_zlim(olo - 0.01, ohi + 0.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
ax.view_init(30, angle) # tilt, rotate
# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
net.reset()
test_net(net)
plot_net(net)
while net.last_cv_percent != 1.0:
net.reset()
net.train()
test_net(net)
plot_net(net)
net = Network(2, 2, 1, activation_function=sigmoid)
# input low and high values:
ilo = -0.5
ihi = 0.5
# output low and high values:
olo = 0
ohi = 1
net.set_inputs([[[ilo, ilo], [olo]], [[ihi, ihi], [olo]], [[ilo, ihi], [ohi]], [[ihi, ilo], [ohi]]])
net.reset()
test_net(net)
plot_net(net)
while net.last_cv_percent != 1.0:
net.reset()
net.train()
test_net(net)
plot_net(net)
net.test()
Let's try an adjusted sigmoid that is moved over a bit:
sigmoid_offset = lambda inputs: T.nnet.sigmoid(inputs - 0.5)
plot_tf(sigmoid_offset, 0, 1, "sigmoid(x - 0.5)")
x = T.dscalar('x')
y = T.nnet.sigmoid(x - 0.5)
dx = theano.function([x], T.grad(y, x))
pf = lambda xs: [dx(x) for x in xs]
plot_pf(pf, 0, 1, "d/dx sigmoid(x - 0.5)")
net = Network(2, 2, 1, activation_function=sigmoid_offset)
# input low and high values:
ilo = 0
ihi = 1
# output low and high values:
olo = 0
ohi = 1
net.set_inputs([[[ilo, ilo], [olo]], [[ihi, ihi], [olo]], [[ilo, ihi], [ohi]], [[ihi, ilo], [ohi]]])
while net.last_cv_percent != 1.0:
net.reset()
net.train()
test_net(net)
plot_net(net, angle=-45)
net.test()
plot_tf(T.tanh, -1 , 1, "tanh(x)")
x = T.dscalar('x')
y = T.tanh(x)
dx = theano.function([x], T.grad(y, x))
pf = lambda xs: [dx(x) for x in xs]
plot_pf(pf, -1, 1, "d/dx tanh(x)")
net = Network(2, 2, 1, activation_function=T.tanh)
# input low and high values:
ilo = -1
ihi = 1
# output low and high values:
olo = 0.0
ohi = 1.0
net.set_inputs([[[ilo, ilo], [olo]], [[ihi, ihi], [olo]], [[ilo, ihi], [ohi]], [[ihi, ilo], [ohi]]])
while net.last_cv_percent != 1.0:
net.reset()
net.train()
test_net(net)
plot_net(net, angle=-45)
net.test()
This seems to work well, but only in batch mode.
net = Network(2, 2, 1, activation_function=T.tanh)
net.batch = True
# input low and high values:
ilo = -1
ihi = 1
# output low and high values:
olo = -1.0
ohi = 1.0
net.set_inputs([[[ilo, ilo], [olo]], [[ihi, ihi], [olo]], [[ilo, ihi], [ohi]], [[ihi, ilo], [ohi]]])
while net.last_cv_percent != 1.0:
net.reset()
net.train()
test_net(net)
plot_net(net)
net.test()
conx has a method of only updating the weights after each epoch.
net = Network(2, 2, 1, activation_function=T.tanh)
net.batch = True
# input low and high values:
ilo = -1
ihi = 1
# output low and high values:
olo = 0
ohi = 1
net.set_inputs([[[ilo, ilo], [olo]], [[ihi, ihi], [olo]], [[ilo, ihi], [ohi]], [[ihi, ilo], [ohi]]])
while net.last_cv_percent != 1.0:
net.reset()
net.train()
test_net(net)
plot_net(net)